msg_tool\scripts\bgi\archive/
dsc.rs

1//! Buriko General Interpreter/Ethornell compressed file in archive
2use crate::ext::io::*;
3use crate::ext::vec::*;
4use crate::scripts::base::*;
5use crate::types::*;
6use crate::utils::bit_stream::*;
7use crate::utils::num_range::*;
8use anyhow::Result;
9use rand::Rng;
10use std::collections::BinaryHeap;
11use std::io::{Seek, Write};
12
13#[derive(Debug)]
14struct HuffmanCode {
15    code: u16,
16    depth: u8,
17}
18
19impl std::cmp::PartialEq for HuffmanCode {
20    fn eq(&self, other: &Self) -> bool {
21        self.code == other.code && self.depth == other.depth
22    }
23}
24
25impl std::cmp::Eq for HuffmanCode {}
26
27impl std::cmp::PartialOrd for HuffmanCode {
28    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
29        let cmp = self.depth.cmp(&other.depth);
30        if cmp == std::cmp::Ordering::Equal {
31            Some(self.code.cmp(&other.code))
32        } else {
33            Some(cmp)
34        }
35    }
36}
37
38impl std::cmp::Ord for HuffmanCode {
39    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
40        let cmp = self.depth.cmp(&other.depth);
41        if cmp == std::cmp::Ordering::Equal {
42            self.code.cmp(&other.code)
43        } else {
44            cmp
45        }
46    }
47}
48
49#[derive(Clone, Debug)]
50struct HuffmanNode {
51    is_parent: bool,
52    code: Option<u16>,
53    left_index: usize,
54    right_index: usize,
55}
56
57/// Decoder for Buriko General Interpreter/Ethornell compressed files (DSC format).
58pub struct DscDecoder<'a> {
59    stream: MsbBitStream<MemReaderRef<'a>>,
60    key: u32,
61    magic: u32,
62    output_size: u32,
63    dec_count: u32,
64}
65
66impl<'a> DscDecoder<'a> {
67    /// Creates a new DscDecoder from the given data slice.
68    pub fn new(data: &'a [u8]) -> Result<Self> {
69        let mut reader = MemReaderRef::new(data);
70        let magic = (reader.read_u16()? as u32) << 16;
71        reader.pos = 0x10;
72        let key = reader.read_u32()?;
73        let output_size = reader.read_u32()?;
74        let dec_count = reader.read_u32()?;
75        let stream = MsbBitStream::new(reader);
76        Ok(DscDecoder {
77            stream,
78            key,
79            magic,
80            output_size,
81            dec_count,
82        })
83    }
84
85    /// Unpacks the DSC file and returns the decompressed data.
86    pub fn unpack(mut self) -> Result<Vec<u8>> {
87        self.stream.m_input.pos = 0x20;
88        let mut codes = Vec::new();
89        for i in 0..512 {
90            let src = self.stream.m_input.read_u8()?;
91            let depth = src.overflowing_sub(self.update_key()).0;
92            if depth > 0 {
93                codes.push(HuffmanCode { code: i, depth })
94            }
95        }
96        codes.sort();
97        let root = Self::create_huffman_tree(codes);
98        self.huffman_decompress(root)
99    }
100
101    fn create_huffman_tree(codes: Vec<HuffmanCode>) -> Vec<HuffmanNode> {
102        let mut trees = Vec::with_capacity(1024);
103        trees.resize(
104            1024,
105            HuffmanNode {
106                is_parent: false,
107                code: None,
108                left_index: 0,
109                right_index: 0,
110            },
111        );
112        let mut left_index = vec![0usize; 512];
113        let mut right_index = vec![0usize; 512];
114        let mut next_node_index = 1usize;
115        let mut depth_nodes = 1usize;
116        let mut depth = 0u8;
117        let mut left_child = true;
118        let mut n = 0;
119        while n < codes.len() {
120            let huffman_node_index = left_child;
121            left_child = !left_child;
122            let mut depth_existed_nodes = 0;
123            while n < codes.len() && codes[n].depth == depth {
124                let index = if huffman_node_index {
125                    left_index[depth_existed_nodes]
126                } else {
127                    right_index[depth_existed_nodes]
128                };
129                trees[index].code = Some(codes[n].code);
130                n += 1;
131                depth_existed_nodes += 1;
132            }
133            let depth_nodes_to_create = depth_nodes - depth_existed_nodes;
134            for i in 0..depth_nodes_to_create {
135                let index = if huffman_node_index {
136                    left_index[depth_existed_nodes + i]
137                } else {
138                    right_index[depth_existed_nodes + i]
139                };
140                let node = &mut trees[index];
141                node.is_parent = true;
142                if left_child {
143                    left_index[i * 2] = next_node_index;
144                    node.left_index = next_node_index;
145                    next_node_index += 1;
146                    left_index[i * 2 + 1] = next_node_index;
147                    node.right_index = next_node_index;
148                    next_node_index += 1;
149                } else {
150                    right_index[i * 2] = next_node_index;
151                    node.left_index = next_node_index;
152                    next_node_index += 1;
153                    right_index[i * 2 + 1] = next_node_index;
154                    node.right_index = next_node_index;
155                    next_node_index += 1;
156                }
157            }
158            depth += 1;
159            depth_nodes = depth_nodes_to_create * 2;
160        }
161        trees
162    }
163
164    fn huffman_decompress(&mut self, nodes: Vec<HuffmanNode>) -> Result<Vec<u8>> {
165        let output_size = self.output_size as usize;
166        let mut output = Vec::with_capacity(output_size);
167        let mut dst = 0;
168        output.resize(output_size, 0);
169        for _ in 0..self.dec_count {
170            let mut current_node = &nodes[0];
171            loop {
172                let bit = self.stream.get_next_bit()?;
173                if !bit {
174                    current_node = &nodes[current_node.left_index]
175                } else {
176                    current_node = &nodes[current_node.right_index]
177                }
178                if !current_node.is_parent {
179                    break;
180                }
181            }
182            let code = *current_node.code.as_ref().unwrap();
183            if code >= 256 {
184                let mut offset = self.stream.get_bits(12)?;
185                let count = ((code & 0xFF) + 2) as usize;
186                offset += 2;
187                output.copy_overlapped(dst - offset as usize, dst, count);
188                dst += count;
189            } else {
190                output[dst] = code as u8;
191                dst += 1;
192            }
193        }
194        if dst != output_size {
195            eprintln!(
196                "Warning: Output size mismatch, expected {}, got {}",
197                self.output_size, dst
198            );
199            crate::COUNTER.inc_warning();
200        }
201        Ok(output)
202    }
203
204    fn update_key(&mut self) -> u8 {
205        let v0 = 20021 * (self.key & 0xffff);
206        let mut v1 = self.magic | (self.key >> 16);
207        v1 = v1
208            .overflowing_mul(20021)
209            .0
210            .overflowing_add(self.key.overflowing_mul(346).0)
211            .0;
212        v1 = (v1 + (v0 >> 16)) & 0xffff;
213        self.key = (v1 << 16) + (v0 & 0xffff) + 1;
214        v1 as u8
215    }
216}
217
218#[derive(Debug, Clone, Copy)]
219enum LzssOp {
220    Literal(u8),
221    Match { len: u16, offset: u16 },
222}
223
224#[derive(Debug)]
225struct FreqNode {
226    freq: u32,
227    symbol: Option<u16>,
228    left: Option<Box<FreqNode>>,
229    right: Option<Box<FreqNode>>,
230}
231impl PartialEq for FreqNode {
232    fn eq(&self, other: &Self) -> bool {
233        self.freq == other.freq
234    }
235}
236impl Eq for FreqNode {}
237impl PartialOrd for FreqNode {
238    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
239        Some(self.cmp(other))
240    }
241}
242impl Ord for FreqNode {
243    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
244        other.freq.cmp(&self.freq)
245    }
246}
247
248fn calculate_huffman_depths(freqs: &[u32]) -> Vec<u8> {
249    const MAX_DEPTH: u8 = 9;
250
251    // 收集所有非零频率的符号
252    let mut symbols_with_freq: Vec<(u16, u32)> = freqs
253        .iter()
254        .enumerate()
255        .filter_map(|(symbol, &freq)| {
256            if freq > 0 {
257                Some((symbol as u16, freq))
258            } else {
259                None
260            }
261        })
262        .collect();
263
264    let mut depths = vec![0u8; 512];
265
266    if symbols_with_freq.is_empty() {
267        return depths;
268    }
269
270    if symbols_with_freq.len() == 1 {
271        depths[symbols_with_freq[0].0 as usize] = 1;
272        return depths;
273    }
274
275    // 使用受限Huffman算法
276    loop {
277        let current_depths = build_huffman_tree(&symbols_with_freq);
278        let max_depth = current_depths.iter().max().copied().unwrap_or(0);
279
280        if max_depth <= MAX_DEPTH {
281            // 将深度映射回原始数组
282            for &(symbol, _) in &symbols_with_freq {
283                let symbol_index = symbols_with_freq
284                    .iter()
285                    .position(|(s, _)| *s == symbol)
286                    .unwrap();
287                depths[symbol as usize] = current_depths[symbol_index];
288            }
289            break;
290        }
291
292        // 如果深度超限,调整频率
293        adjust_frequencies_for_depth_limit(&mut symbols_with_freq);
294    }
295
296    depths
297}
298
299fn build_huffman_tree(symbols_with_freq: &[(u16, u32)]) -> Vec<u8> {
300    let mut heap = BinaryHeap::new();
301
302    // 添加所有叶子节点
303    for &(symbol, freq) in symbols_with_freq {
304        heap.push(FreqNode {
305            freq,
306            symbol: Some(symbol),
307            left: None,
308            right: None,
309        });
310    }
311
312    // 构建Huffman树
313    while heap.len() > 1 {
314        let node1 = heap.pop().unwrap();
315        let node2 = heap.pop().unwrap();
316        let new_node = FreqNode {
317            freq: node1.freq + node2.freq,
318            symbol: None,
319            left: Some(Box::new(node1)),
320            right: Some(Box::new(node2)),
321        };
322        heap.push(new_node);
323    }
324
325    // 计算深度
326    let mut depths = vec![0u8; symbols_with_freq.len()];
327    if let Some(root) = heap.pop() {
328        calculate_depths(&root, 0, symbols_with_freq, &mut depths);
329    }
330
331    depths
332}
333
334fn calculate_depths(
335    node: &FreqNode,
336    depth: u8,
337    symbols_with_freq: &[(u16, u32)],
338    depths: &mut [u8],
339) {
340    if let Some(symbol) = node.symbol {
341        let symbol_index = symbols_with_freq
342            .iter()
343            .position(|(s, _)| *s == symbol)
344            .unwrap();
345        depths[symbol_index] = if depth == 0 { 1 } else { depth };
346    } else {
347        if let Some(ref left) = node.left {
348            calculate_depths(left, depth + 1, symbols_with_freq, depths);
349        }
350        if let Some(ref right) = node.right {
351            calculate_depths(right, depth + 1, symbols_with_freq, depths);
352        }
353    }
354}
355
356fn adjust_frequencies_for_depth_limit(symbols_with_freq: &mut [(u16, u32)]) {
357    // 按频率排序
358    symbols_with_freq.sort_by(|a, b| a.1.cmp(&b.1));
359
360    // 使用Package-Merge算法的简化版本
361    // 这里使用一个启发式方法:增加低频符号的频率
362    let min_freq = symbols_with_freq[0].1;
363    let adjustment = (min_freq as f64 * 0.1).max(1.0) as u32;
364
365    // 找到频率最低的几个符号并调整它们的频率
366    let num_to_adjust = (symbols_with_freq.len() / 4).max(1);
367    for i in 0..num_to_adjust.min(symbols_with_freq.len()) {
368        symbols_with_freq[i].1 += adjustment;
369    }
370}
371
372fn generate_canonical_codes(depths: &[u8]) -> Vec<Option<(u16, u8)>> {
373    let mut codes_with_depths = vec![];
374    for (symbol, &depth) in depths.iter().enumerate() {
375        if depth > 0 {
376            codes_with_depths.push((symbol as u16, depth));
377        }
378    }
379    codes_with_depths.sort_by(|a, b| {
380        let depth_cmp = a.1.cmp(&b.1);
381        if depth_cmp == std::cmp::Ordering::Equal {
382            a.0.cmp(&b.0)
383        } else {
384            depth_cmp
385        }
386    });
387
388    let mut huffman_codes = vec![None; 512];
389    let mut current_code = 0u16;
390    let mut last_depth = 0u8;
391
392    for &(symbol, depth) in &codes_with_depths {
393        if last_depth != 0 {
394            current_code <<= depth - last_depth;
395        }
396        huffman_codes[symbol as usize] = Some((current_code, depth));
397        current_code += 1;
398        last_depth = depth;
399    }
400
401    huffman_codes
402}
403
404/// Encoder for Buriko General Interpreter/Ethornell compressed files (DSC format).
405pub struct DscEncoder<'a, T: Write + Seek> {
406    stream: MsbBitWriter<'a, T>,
407    magic: u32,
408    key: u32,
409    dec_count: u32,
410    min_len: usize,
411}
412
413impl<'a, T: Write + Seek> DscEncoder<'a, T> {
414    /// Creates a new DscEncoder with the given writer and minimum length for LZSS compression.
415    pub fn new(writer: &'a mut T, min_len: usize) -> Self {
416        let stream = MsbBitWriter::new(writer);
417        DscEncoder {
418            stream,
419            magic: 0x5344 << 16, // "DS"
420            key: rand::rng().random(),
421            dec_count: 0,
422            min_len,
423        }
424    }
425
426    /// Packs the given data into the DSC format using LZSS compression.
427    pub fn pack(mut self, data: &[u8]) -> Result<()> {
428        // LZSS compression
429        let mut ops = vec![];
430        let mut pos = 0;
431
432        const MAX_LEN: usize = 257;
433        const WINDOW_SIZE: usize = 4097;
434
435        let mut head: Vec<i32> = vec![-1; 1 << 16];
436        let mut prev: Vec<i32> = vec![-1; data.len()];
437
438        while pos < data.len() {
439            let max_len = (data.len() - pos).min(MAX_LEN);
440            let mut best_len = 0;
441            let mut best_offset = 0;
442
443            if max_len >= self.min_len {
444                let limit = pos.saturating_sub(WINDOW_SIZE);
445                let key = (data[pos] as u16) << 8 | data[pos + 1] as u16;
446                let mut match_pos_i32 = head[key as usize];
447
448                while match_pos_i32 != -1 {
449                    let match_pos = match_pos_i32 as usize;
450                    if match_pos < limit {
451                        break;
452                    }
453
454                    if data.get(match_pos + best_len) == data.get(pos + best_len) {
455                        let mut current_len = 0;
456                        for i in 0..max_len {
457                            if data.get(pos + i) != data.get(match_pos + i) {
458                                break;
459                            }
460                            current_len += 1;
461                        }
462
463                        if current_len > best_len {
464                            best_len = current_len;
465                            best_offset = pos - match_pos;
466                            if best_len >= max_len {
467                                break;
468                            }
469                        }
470                    }
471                    match_pos_i32 = prev[match_pos];
472                }
473            }
474
475            if best_len >= self.min_len && best_offset >= 2 {
476                ops.push(LzssOp::Match {
477                    len: best_len as u16,
478                    offset: best_offset as u16,
479                });
480                for i in 0..best_len {
481                    if pos + i + 1 < data.len() {
482                        let key = (data[pos + i] as u16) << 8 | data[pos + i + 1] as u16;
483                        let current_pos = pos + i;
484                        prev[current_pos] = head[key as usize];
485                        head[key as usize] = current_pos as i32;
486                    }
487                }
488                pos += best_len;
489            } else {
490                ops.push(LzssOp::Literal(data[pos]));
491                if pos + 1 < data.len() {
492                    let key = (data[pos] as u16) << 8 | data[pos + 1] as u16;
493                    prev[pos] = head[key as usize];
494                    head[key as usize] = pos as i32;
495                }
496                pos += 1;
497            }
498        }
499
500        let symbols: Vec<u16> = ops
501            .iter()
502            .map(|op| match op {
503                LzssOp::Literal(byte) => *byte as u16,
504                LzssOp::Match { len, .. } => 256 + (len - 2),
505            })
506            .collect();
507        self.dec_count = symbols.len() as u32;
508
509        let mut freqs = vec![0u32; 512];
510        for &s in &symbols {
511            freqs[s as usize] += 1;
512        }
513
514        let depths = calculate_huffman_depths(&freqs);
515        let huffman_codes = generate_canonical_codes(&depths);
516
517        self.stream.writer.write_all(b"DSC FORMAT 1.00\0")?;
518        self.stream.writer.seek(std::io::SeekFrom::Start(0x10))?;
519        self.stream.writer.write_u32(self.key)?;
520        self.stream.writer.write_u32(data.len() as u32)?;
521        self.stream.writer.write_u32(self.dec_count)?;
522        self.stream.writer.seek(std::io::SeekFrom::Start(0x20))?;
523
524        for depth in depths.iter() {
525            let key = self.update_key();
526            self.stream.writer.write_u8(depth.overflowing_add(key).0)?;
527        }
528
529        for op in &ops {
530            match op {
531                LzssOp::Literal(byte) => {
532                    let symbol = *byte as u16;
533                    let (code, len) = huffman_codes[symbol as usize].unwrap();
534                    self.stream.put_bits(code as u32, len)?;
535                }
536                LzssOp::Match { len, offset } => {
537                    let symbol = 256 + (len - 2);
538                    let (code, huff_len) = huffman_codes[symbol as usize].unwrap();
539                    self.stream.put_bits(code as u32, huff_len)?;
540                    self.stream.put_bits((*offset - 2) as u32, 12)?;
541                }
542            }
543        }
544        self.stream.flush()?;
545        Ok(())
546    }
547
548    fn update_key(&mut self) -> u8 {
549        let v0 = 20021 * (self.key & 0xffff);
550        let mut v1 = self.magic | (self.key >> 16);
551        v1 = v1
552            .overflowing_mul(20021)
553            .0
554            .overflowing_add(self.key.overflowing_mul(346).0)
555            .0;
556        v1 = (v1 + (v0 >> 16)) & 0xffff;
557        self.key = (v1 << 16) + (v0 & 0xffff) + 1;
558        v1 as u8
559    }
560}
561
562#[derive(Debug)]
563/// Builder for DSC scripts.
564pub struct DscBuilder {}
565
566impl DscBuilder {
567    /// Creates a new instance of `DscBuilder`.
568    pub fn new() -> Self {
569        DscBuilder {}
570    }
571}
572
573impl ScriptBuilder for DscBuilder {
574    fn default_encoding(&self) -> Encoding {
575        Encoding::Cp932
576    }
577
578    fn default_archive_encoding(&self) -> Option<Encoding> {
579        Some(Encoding::Cp932)
580    }
581
582    fn build_script(
583        &self,
584        buf: Vec<u8>,
585        _filename: &str,
586        _encoding: Encoding,
587        _archive_encoding: Encoding,
588        config: &ExtraConfig,
589        _archive: Option<&Box<dyn Script>>,
590    ) -> Result<Box<dyn Script>> {
591        Ok(Box::new(Dsc::new(buf, config)?))
592    }
593
594    fn extensions(&self) -> &'static [&'static str] {
595        &[]
596    }
597
598    fn script_type(&self) -> &'static ScriptType {
599        &ScriptType::BGIDsc
600    }
601
602    fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
603        if buf_len >= 16 && buf.starts_with(b"DSC FORMAT 1.00\0") {
604            return Some(255);
605        }
606        None
607    }
608
609    fn can_create_file(&self) -> bool {
610        true
611    }
612
613    fn create_file<'a>(
614        &'a self,
615        filename: &'a str,
616        mut writer: Box<dyn WriteSeek + 'a>,
617        _encoding: Encoding,
618        _file_encoding: Encoding,
619        config: &ExtraConfig,
620    ) -> Result<()> {
621        let encoder = DscEncoder::new(&mut writer, config.bgi_compress_min_len);
622        let data = crate::utils::files::read_file(filename)?;
623        encoder.pack(&data)?;
624        Ok(())
625    }
626}
627
628#[derive(Debug)]
629/// DSC script
630pub struct Dsc {
631    data: Vec<u8>,
632    min_len: usize,
633}
634
635impl Dsc {
636    /// Creates a new Dsc script
637    ///
638    /// * `buf` - The buffer containing the DSC data.
639    /// * `config` - Extra configuration options.
640    pub fn new(buf: Vec<u8>, config: &ExtraConfig) -> Result<Self> {
641        if buf.len() < 16 || !buf.starts_with(b"DSC FORMAT 1.00\0") {
642            return Err(anyhow::anyhow!("Invalid DSC format"));
643        }
644        let decoder = DscDecoder::new(&buf)?;
645        let data = decoder.unpack()?;
646        Ok(Dsc {
647            data,
648            min_len: config.bgi_compress_min_len,
649        })
650    }
651}
652
653impl Script for Dsc {
654    fn default_output_script_type(&self) -> OutputScriptType {
655        OutputScriptType::Custom
656    }
657
658    fn is_output_supported(&self, output: OutputScriptType) -> bool {
659        matches!(output, OutputScriptType::Custom)
660    }
661
662    fn default_format_type(&self) -> FormatOptions {
663        FormatOptions::None
664    }
665
666    fn custom_output_extension(&self) -> &'static str {
667        ""
668    }
669
670    fn custom_export(&self, filename: &std::path::Path, _encoding: Encoding) -> Result<()> {
671        let mut f = std::fs::File::create(filename)?;
672        f.write_all(&self.data)?;
673        Ok(())
674    }
675
676    fn custom_import<'a>(
677        &'a self,
678        custom_filename: &'a str,
679        mut file: Box<dyn WriteSeek + 'a>,
680        _encoding: Encoding,
681        _output_encoding: Encoding,
682    ) -> Result<()> {
683        let encoder = DscEncoder::new(&mut file, self.min_len);
684        let data = crate::utils::files::read_file(custom_filename)?;
685        encoder.pack(&data)?;
686        Ok(())
687    }
688}
689
690/// Parses the minimum length for LZSS compression from a string.
691pub fn parse_min_length(len: &str) -> Result<usize, String> {
692    number_range(len, 2, 256)
693}